

#--------------------------------------------------------------------------------#

# Main results ####

#--------------------------------------------------------------------------------#


# Authors: WANG, Howard; CHENG, Edmund; CHEN, Xi; LIANG, Hai
# Version: 2024-Oct-30
# R version 4.4.1 (2024-06-14)
# Running under: macOS 15.1



rm(list = ls())

#Set directory#
(code_file_path = rstudioapi::getActiveDocumentContext()$path) # Get current path of this code file
setwd(dirname(code_file_path)) # dirname(): reach the upper level folder
getwd() # current directory

#Set packages#
# install.packages("tidyverse")
library(tidyverse) # sometimes we need to re-install the tidyverse package, or we cannot use "%>% select"
library(stringr) # for string manipulation
library(fixest) # for perform estimations with multiple fixed-effects
library(lme4) # for using glmer
library(sjPlot) # for tab_model
library(sjmisc) # for variable transformation
library(jiebaR) # for text segment
library(ggpubr) # arranging multiple graphs 
library(mediation) # for mediation analysis

# Solve the problem of conflict packages
library(conflicted)
conflicts_prefer(dplyr::select) # To ensure that when using select, it refers to dplyr
conflicts_prefer(mediation::mediate)

# Prepare to show Chinese characters
if (!require("showtext")) {install.packages("showtext")}
library(showtext)
showtext_auto(enable = T)


# Load data #
load(file="data/llmb_multilevel_data_final.Rdata")
d=llmb_multilevel_data_final




#--------------------------------------------------------------------------------#
# Introduction  -------------------------------------------------------------------------
#--------------------------------------------------------------------------------#
## Figure 1 #####
# Figure 1 is not generated by coding. It is generated by using https://app.diagrams.net/





#___________####
#--------------------------------------------------------------------------------#
# Data and measurement  -------------------------------------------------------------------------
#--------------------------------------------------------------------------------#

## speed
summary(d$replied_days)
# Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
# 0.00   12.00   22.00   34.61   39.00  681.00 
median(d$replied_days,na.rm = T) # 22
sd(d$replied_days,na.rm = T) # 45.79134


## Examples of DV ####
# referral
t = d |> subset(referral ==1) |> select(tid,original_post, reply_length, reply_content)
t|>subset(tid==9793676)|>select(reply_content)
# 相关问题，建议您咨询安阳市人社局，联系电话：2209703
t|>subset(tid==8712614)|>select(reply_content)
# 你好！请联系可克达拉市政府，不属于伊宁市范围。


# direct info
t = d |> subset(direct_info ==1) |> select(tid,original_post, reply_length, reply_content)
t|>subset(tid==7199959)|>select(reply_content)
# 网民您好！您反映的问题，我们向行唐县委进行了核实交办。现将情况回复如下。    经了解，南豆庄村当前共有30户31人享受低保政策，村评议工作有记录、有公示，符合工作要求。
t|>subset(tid==8567206)|>select(reply_content)
# 重阳大道东段与S206交叉口12月30日前通车。

# action 
t = d |> subset(action ==1) |> select(tid,original_post, reply_length, reply_content)
t|>subset(tid==7030850)|>select(reply_content)
# 您好！我局基教处负责人已沟通联系，您表示理解。祝你学习进步！
t|>subset(tid==6932098)|>select(reply_content)
# 网民您好！您反映的问题，我们向藁城区委进行了核实交办。现将情况回复如下。    据反馈，您家已接通临时用电，村委会将按期为您配送生活饮用水。工作人员已与您联系，您表示家里已通电，饮水充足。



# resolved 
t = d |> subset(resolved ==1) |> select(tid,original_post, reply_length, reply_content)
t|>subset(tid==8224176)|>select(reply_content)
# 网民您好，您反映的问题，我们向元氏县委进行了核实交办。现将情况回复如下。        经协调，9月17日，项目施工单位已将17500元工资发放到您和工友手中。据反馈，您对处理结果表示满意。
t|>subset(tid==8531294)|>select(reply_content)
# 网友您好，您的问题已收悉，翠驰出行已退款。


## Examples of IV ####
table(is.na(d$typeName)) # no missing data
# complaints
t = d |> subset(complaint ==1) |> select(tid,original_post, post_length, reply_content)
# assumption
t|>subset(tid==8246687)|>select(original_post)
# 卖海鲜的短斤少两，欺骗消费者，买了六斤螃蟹中间少了两斤多。
# house 
t|>subset(tid==7968529)|>select(original_post)
# 香榭华都小区购房八年，至今办不了房产证。
# noise
t|>subset(tid==11420963)|>select(original_post)
# 威青一级高速噪音过大，影响小区居民休息，噪音非常大
# heating
t|>subset(tid==8900554)|>select(original_post)
# 唐人中心B栋家里不到18度，居民投诉无门！政府如何为百姓做主？

# non-complaints: inquiry, seeking help, suggestion, and thank-you
t = d |> subset(complaint ==0) |> select(tid, typeName, original_post, post_length, reply_content)
# inquiry 
t|>subset(tid==11766704)|>select(original_post)
# 原平南滩村何时安装天然气？望尽快回复。。

# seeking help
t|>subset(tid==7829888)|>select(original_post)
# 六年了房屋一直拖欠未解决，希望领导重视。

# suggestion
t|>subset(tid==10756021)|>select(original_post)
# 希望鄂州公交有公交实时路径查询，距离本站还要多久 ，以免错过时间，望微信能够上线小程序

# thank-you
t|>subset(tid==7723960)|>select(original_post)
# 上次留言之后，苑东街道办事处的主任亲自带人来居民家中查看漏雨情况，现场联系维修人员，马上就给维修，感谢各位领导，我们的民生问题得到了回复及解决，万分感谢！





## Figure 2 Data. Variation of reply numbers ####

# day
par(mfrow=c(1,1))
df1= d %>% dplyr::group_by(reply_ymd)%>%
  dplyr::summarise(day_reply_n=n())

(mean_day_reply <- mean(df1$day_reply_n, na.rm = TRUE))
ceiling(mean_day_reply)

# Create the plot with the horizontal line for the mean
day_reply <- df1 |>
  ggplot(aes(x = reply_ymd, y = day_reply_n)) + 
  geom_line(size = 0.7) + 
  theme_bw(base_size = 12, base_family = 'Times New Roman') +
  xlab("Day (1 Jan. 2020 - 31 Dec. 2021)") + 
  ylab("Number of replies") +
  scale_x_date(date_labels = "%b", date_breaks = "2 month") +
  theme(
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    text = element_text(size = 12, colour = "black"),
    axis.title.x = element_text(size = 16, colour = "black"), 
    axis.title.y = element_text(size = 16, colour = "black"), 
    axis.text.x = element_text(size = 9, colour = "black"), 
    axis.text.y = element_text(size = 9, colour = "black")
  ) +
  geom_hline(yintercept = mean_day_reply, linetype = "dashed", color = "grey", size = 1) + 
  ylim(0, 1500) +
  annotate("text", x = as.Date("2020-01-01"), y = mean_day_reply + 50, 
           label = paste("Mean =", ceiling(mean_day_reply)), color = "black", size = 4, hjust = 0)

## month
df2= d %>% dplyr::group_by(reply_ym)%>%
  dplyr::summarise(month_reply_n=n())

(mean_month_reply <- mean(df2$month_reply_n, na.rm = TRUE))
ceiling(mean_month_reply)

month_reply <- ggplot(df2, aes(x = reply_ym, y = month_reply_n)) + 
  geom_line(size = 0.7) + 
  theme_bw(base_size = 12, base_family = 'Times New Roman') +
  xlab("Month (Jan. 2020 - Dec. 2021)") + 
  ylab("Number of replies") +
  scale_x_date(date_labels = "%b", date_breaks = "2 month") +
  theme(
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    text = element_text(size = 12, colour = "black"),
    axis.title.x = element_text(size = 16, colour = "black"), 
    axis.title.y = element_text(size = 16, colour = "black"), 
    axis.text.x = element_text(size = 9, colour = "black"), 
    axis.text.y = element_text(size = 9, colour = "black")
  ) +
  geom_hline(yintercept = mean_month_reply, linetype = "dashed", color = "grey", size = 1) + 
  ylim(0, 20000) +
  annotate("text", x = as.Date("2020-01-01"), y = mean_month_reply + 700, 
           label = paste("Mean =", ceiling(mean_month_reply)), color = "black", size = 4, hjust = 0)

ggpubr::ggarrange(day_reply,month_reply, 
          ncol=1, nrow=2, 
          font.label = list(size = 9, color = "blue", face = "bold", family = ""),
          labels = c("A","B"))
ggsave("output/main/Figure 2 Temporal trend of reply number by day and month.pdf",width=12,height=6) 



## Figure 3 The geo distribution of DV and IV ##########
df <- d %>%
  group_by(pref_ch,pref_english_short,prefcode)%>%
  summarise(
    ave_concrete = mean(concrete, na.rm = T),
    ave_action = mean(action, na.rm = T),
    ave_resolved = mean(resolved, na.rm = T),
    ave_reply_length = mean(reply_length, na.rm = T),
    ave_complaint = mean(complaint, na.rm = T)
  )

# Load the necessary libraries
pacman::p_load(ggplot2,tidyverse,extrafont,RColorBrewer,stringi,readxl,sjPlot,devtools,sp,sf, mapstools)

# Read files by a particular projection pattern
x <- sf::st_read("data/geo/CN-shi-A.shp") |> st_transform("+proj=longlat +ellps=clrk66") 

# Prepare for showing Chinese characters
x$map_cityname = stri_enc_toutf8(x$CityNameC) # ensure the format of string # Or stringi::stri_encode(string_object, "gb2312", "utf-8")

# Revise two errors
x$map_cityname = ifelse(x$map_cityname=="荷泽市", "菏泽市",x$map_cityname)
x$map_cityname = ifelse(x$map_cityname=="襄樊市", "襄阳市",x$map_cityname)
unique(x$map_cityname)
t = table(x$sheng) |> as.data.frame()

# find the shared city name
cityname_list = c("石家庄|唐山|秦皇岛|邯郸|邢台|保定|张家口|承德|沧州|廊坊|衡水|太原|大同|阳泉|长治|晋城|朔州|晋中|运城|忻州|临汾|吕梁|呼和浩特|包头|乌海|赤峰|通辽|鄂尔多斯|呼伦贝尔|巴彦淖尔|乌兰察布|兴安|锡林郭勒|阿拉善|沈阳|大连|鞍山|抚顺|本溪|丹东|锦州|营口|阜新|辽阳|盘锦|铁岭|朝阳|葫芦岛|长春|吉林|四平|辽源|通化|白山|松原|白城|延边|哈尔滨|齐齐哈尔|鸡西|鹤岗|双鸭山|大庆|伊春|佳木斯|七台河|牡丹江|黑河|绥化|大兴安岭|南京|无锡|徐州|常州|苏州|南通|连云港|淮安|盐城|扬州|镇江|泰州|宿迁|杭州|宁波|温州|嘉兴|湖州|绍兴|金华|衢州|舟山|台州|丽水|合肥|芜湖|蚌埠|淮南|马鞍山|淮北|铜陵|安庆|黄山|滁州|阜阳|宿州|六安|亳州|池州|宣城|福州|厦门|莆田|三明|泉州|漳州|南平|龙岩|宁德|南昌|景德镇|萍乡|九江|新余|鹰潭|赣州|吉安|宜春|抚州|上饶|济南|青岛|淄博|枣庄|东营|烟台|潍坊|济宁|泰安|威海|日照|临沂|德州|聊城|滨州|菏泽|郑州|开封|洛阳|平顶山|安阳|鹤壁|新乡|焦作|濮阳|许昌|漯河|三门峡|南阳|商丘|信阳|周口|驻马店|武汉|黄石|十堰|宜昌|襄阳|鄂州|荆门|孝感|荆州|黄冈|咸宁|随州|恩施|长沙|株洲|湘潭|衡阳|邵阳|岳阳|常德|张家界|益阳|郴州|永州|怀化|娄底|湘西|广州|韶关|深圳|珠海|汕头|佛山|江门|湛江|茂名|肇庆|惠州|梅州|汕尾|河源|阳江|清远|东莞|中山|潮州|揭阳|云浮|南宁|柳州|桂林|梧州|北海|防城港|钦州|贵港|玉林|百色|贺州|河池|来宾|崇左|海口|三亚|三沙|儋州|成都|自贡|攀枝花|泸州|德阳|绵阳|广元|遂宁|内江|乐山|南充|眉山|宜宾|广安|达州|雅安|巴中|资阳|阿坝|甘孜|凉山|贵阳|六盘水|遵义|安顺|毕节|铜仁|黔西南|黔东南|黔南|昆明|曲靖|玉溪|保山|昭通|丽江|普洱|临沧|楚雄|红河|文山|西双版纳|大理|德宏|怒江|迪庆|拉萨|日喀则|昌都|林芝|山南|那曲|阿里|西安|铜川|宝鸡|咸阳|渭南|延安|汉中|榆林|安康|商洛|兰州|嘉峪关|金昌|白银|天水|武威|张掖|平凉|酒泉|庆阳|定西|陇南|临夏|甘南|西宁|海东|海北|黄南|海南|果洛|玉树|海西|银川|石嘴山|吴忠|固原|中卫|乌鲁木齐|克拉玛依|吐鲁番|哈密|昌吉|博尔塔拉|巴音郭楞|阿克苏|克孜勒苏|喀什|和田|伊犁|塔城|阿勒泰")
x$pref_ch_short <- x$map_cityname |> str_extract(cityname_list)
# x_cleaned = subset(x, !is.na(pref_ch_short))

df$pref_ch_short <- df$pref_ch |> str_extract(cityname_list) # df$pref_ch
df_cleaned = subset(df, !is.na(pref_ch_short))


# Join df data to x
x_df <- left_join(x, df_cleaned, by="pref_ch_short")

# Convert the data frame to an sf object
china_map_sf <- st_as_sf(x_df, wkt = "geometry")

# check
check = subset(china_map_sf, china_map_sf$SHI!=china_map_sf$prefcode)

# Plot the map
p_dv = ggplot() +
  theme_void() + # To set the theme of the plot
  geom_sf(data = china_map_sf, aes(fill = ave_concrete), color = "grey") + # To decide the contents filled in the polygon
  scale_fill_distiller(palette = "Reds", direction = 1) + # The palette argument is set to "Reds", which specifies a gradient of red colors. The direction argument is set to 1, indicating that the gradient will be from low to high values.
  guides(fill = guide_legend(title = "Ratio of \nconcrete reply")) +
  theme(
    legend.position = "right", 
    legend.margin = ggplot2::margin(0, 0, 0, -10), # top, right, bottom, and left.
    legend.title = element_text(size = 20), # Increase legend title size
    legend.text = element_text(size = 20),  # Increase legend text size
    plot.title = element_text(hjust = 0.5, size = 24, face = "bold"),
    plot.margin = ggplot2::margin(0, 0, 0, 0) # top, right, bottom, and left.
  ) + 
  labs(title = "Geographic distribution of dependent variable")


p_iv = ggplot() +
  theme_void() + # To set the theme of the plot
  geom_sf(data = china_map_sf, aes(fill = ave_complaint), color = "grey") + # To decide the contents filled in the polygon
  scale_fill_distiller(palette = "Reds", direction = 1) + # The palette argument is set to "Reds", which specifies a gradient of red colors. The direction argument is set to 1, indicating that the gradient will be from low to high values.
  guides(fill = guide_legend(title = "Ratio of \nonline complaint")) +
  theme(
    legend.position = "right", 
    legend.margin = ggplot2::margin(0, 0, 0, -10), # top, right, bottom, and left.
    legend.title = element_text(size = 20), # Increase legend title size
    legend.text = element_text(size = 20),  # Increase legend text size
    plot.title = element_text(hjust = 0.5, size = 24, face = "bold"),
    plot.margin = ggplot2::margin(-10, 0, 0, 0) # top, right, bottom, and left.
  ) + 
  labs(title = "Geographic distribution of independent variable")

map = ggarrange(p_dv, p_iv, ncol = 2)
ggsave(plot = map, "output/main/Figure 3 Geo distribution of dv and iv.jpg", width = 15.5, height = 7, dpi = 160, quality = 1000)




## Table 1: Variable summary #####
# Descriptive analysis to show these exist non-Contentious claims and related replies. 

dt <- d %>% 
  dplyr::select(reply_ym, concrete, action, resolved, 
                complaint,  # treatment
                # protest_risk,from_hotline12345, # risk perception
                post_length, phone, topic,  # post controls
                male, age, han_race,edu_shuangyiliu_bin, edu_phd,tenure_length_m, # leader controls
                pop_log,gdp_pc_log, revenue_log,  # city controls
                replied_days, reply_length, # reply controls
                # reply_length_log, 
                normalized_negative_score,
                subject_citizenship,legal_citizenship,socialist_citizenship,
                protest_risk, from_hotline12345
  )


stargazer::stargazer(dt,
                     type = "text", # use "html" or "latex" for output to html or latex
                     title = "Table 1. Descriptive Statistics",
                     label = "tab:desc_stats",
                     digits = 3, # number of decimal places
                     header = FALSE, 
                     summary.stat = c("n", "mean", "median", "sd", "min", "max"),
                     out = "output/main/Table 1 Variables summary.html" ) 










#____________####
#--------------------------------------------------------------------------------#
# H1: Complaint and responsiveness quality ####
#--------------------------------------------------------------------------------#



## Figure 4. Divergence trend between complaint and noncomplaint ####
# Aim: to show the different time trends between complaint and non-complaint


d1 = d %>% subset(complaint == 1) # contentious dataset
d2 = d %>% subset(complaint == 0) # non-contentious dataset

# concreteness issue only exists in threads that have been replied 

d1_replied = d1 %>% subset(reply_ym >= ym(202001) & reply_ym <= ym(202112) ) %>% subset(replied ==1)
d1_replied_n = d1_replied %>% group_by(reply_ym) %>% summarise(replied_n = n())
d1_replied_concrete_n = d1_replied %>% subset(concrete ==1) %>% group_by(reply_ym) %>% summarise(concrete_n= n())
d1_replied_concrete_ratio = merge(d1_replied_n, d1_replied_concrete_n, by= "reply_ym")
d1_replied_concrete_ratio$ratio = d1_replied_concrete_ratio$concrete_n/d1_replied_concrete_ratio$replied_n
d1_replied_concrete_ratio$type= "complaint"


d2_replied = d2 %>% subset(reply_ym >= ym(202001) & reply_ym <= ym(202112) ) %>% subset(replied ==1)
d2_replied_n = d2_replied %>% group_by(reply_ym) %>% summarise(replied_n = n())
d2_replied_concrete_n = d2_replied %>% subset(concrete ==1) %>% group_by(reply_ym) %>% summarise(concrete_n= n())
d2_replied_concrete_ratio = merge(d2_replied_n, d2_replied_concrete_n, by= "reply_ym")
d2_replied_concrete_ratio$ratio = d2_replied_concrete_ratio$concrete_n/d2_replied_concrete_ratio$replied_n
d2_replied_concrete_ratio$type= "non-complaint"

# replied_concrete_ratio 
replied_concrete_ratio= rbind(d1_replied_concrete_ratio, d2_replied_concrete_ratio)


# action as reply 
d1_replied = d1 %>% subset(reply_ym >= ym(202001) & reply_ym <= ym(202112) ) %>% subset(replied ==1)
d1_replied_n = d1_replied %>% group_by(reply_ym) %>% summarise(replied_n = n())
d1_replied_action_n = d1_replied %>% subset(action ==1) %>% group_by(reply_ym) %>% summarise(action_n= n())
d1_replied_action_ratio = merge(d1_replied_n, d1_replied_action_n, by= "reply_ym")
d1_replied_action_ratio$ratio = d1_replied_action_ratio$action_n/d1_replied_action_ratio$replied_n
d1_replied_action_ratio$type= "complaint"


d2_replied = d2 %>% subset(reply_ym >= ym(202001) & reply_ym <= ym(202112) ) %>% subset(replied ==1)
d2_replied_n = d2_replied %>% group_by(reply_ym) %>% summarise(replied_n = n())
d2_replied_action_n = d2_replied %>% subset(action ==1) %>% group_by(reply_ym) %>% summarise(action_n= n())
d2_replied_action_ratio = merge(d2_replied_n, d2_replied_action_n, by= "reply_ym")
d2_replied_action_ratio$ratio = d2_replied_action_ratio$action_n/d2_replied_action_ratio$replied_n
d2_replied_action_ratio$type= "non-complaint"

# replied_action_ratio 
replied_action_ratio= rbind(d1_replied_action_ratio, d2_replied_action_ratio)


# resolved as reply 
d1_replied = d1 %>% subset(reply_ym >= ym(202001) & reply_ym <= ym(202112) ) %>% subset(replied ==1)
d1_replied_n = d1_replied %>% group_by(reply_ym) %>% summarise(replied_n = n())
d1_replied_resolved_n = d1_replied %>% subset(resolved ==1) %>% group_by(reply_ym) %>% summarise(resolved_n= n())
d1_replied_resolved_ratio = merge(d1_replied_n, d1_replied_resolved_n, by= "reply_ym")
d1_replied_resolved_ratio$ratio = d1_replied_resolved_ratio$resolved_n/d1_replied_resolved_ratio$replied_n
d1_replied_resolved_ratio$type= "complaint"


d2_replied = d2 %>% subset(reply_ym >= ym(202001) & reply_ym <= ym(202112) ) %>% subset(replied ==1)
d2_replied_n = d2_replied %>% group_by(reply_ym) %>% summarise(replied_n = n())
d2_replied_resolved_n = d2_replied %>% subset(resolved ==1) %>% group_by(reply_ym) %>% summarise(resolved_n= n())
d2_replied_resolved_ratio = merge(d2_replied_n, d2_replied_resolved_n, by= "reply_ym")
d2_replied_resolved_ratio$ratio = d2_replied_resolved_ratio$resolved_n/d2_replied_resolved_ratio$replied_n
d2_replied_resolved_ratio$type= "non-complaint"

# replied_resolved_ratio 
replied_resolved_ratio= rbind(d1_replied_resolved_ratio, d2_replied_resolved_ratio)


### Graph

# Rename 'type' levels for all datasets
replied_concrete_ratio$type <- recode(replied_concrete_ratio$type, "complaint" = "Complaint", "non-complaint" = "Non-complaint")
replied_action_ratio$type <- recode(replied_action_ratio$type, "complaint" = "Complaint", "non-complaint" = "Non-complaint")
replied_resolved_ratio$type <- recode(replied_resolved_ratio$type, "complaint" = "Complaint", "non-complaint" = "Non-complaint")

# Create the plots
p_concrete <- replied_concrete_ratio |> 
  ggplot(aes(x = reply_ym, y = ratio, colour = type)) + 
  geom_line(size = 0.3) + geom_point() + ylim(0, 1) +
  scale_x_date(date_labels = "%b", date_breaks = "3 months") +
  xlab("Month\nFrom Jan. 2020 to Dec. 2021") + ylab("Ratio of concrete replies") +
  labs(colour = "Legend") +  # Set legend title
  theme(legend.position = "none")

p_action <- replied_action_ratio |> 
  ggplot(aes(x = reply_ym, y = ratio, colour = type)) + 
  geom_line(size = 0.3) + geom_point() + ylim(0, 1) +
  scale_x_date(date_labels = "%b", date_breaks = "3 months") +
  xlab("Month\nFrom Jan. 2020 to Dec. 2021") + ylab("Ratio of action as replies") +
  labs(colour = "Legend") + # Set legend title
  theme(legend.position = "none")


p_resolved <- replied_resolved_ratio |> 
  ggplot(aes(x = reply_ym, y = ratio, colour = type)) + 
  geom_line(size = 0.3) + geom_point() + ylim(0, 1) +
  scale_x_date(date_labels = "%b", date_breaks = "3 months") +
  xlab("Month\nFrom Jan. 2020 to Dec. 2021") + ylab("Ratio of problem resolved as replies") +
  labs(colour = "Legend") + # Set legend title
  theme(legend.position = c(0.95, 0.95), # Position legend inside the plot
        legend.justification = c("right", "top"))

# Combine the plots with a common legend
theme_set(theme_sjplot()) # set the format of graph 
ggarrange(p_concrete, p_action, p_resolved, 
          nrow = 1, ncol = 3, labels = c("A", "B", "C"))
ggsave(file = "output/main/Figure 4 Complaint vs. noncomplaint.pdf", width = 15, height = 6)





## Table 2 ####
summary(baseline1 <- glmer(concrete~complaint+
                             (1|cityid)+(0+complaint|cityid), # City cluster
                           d,
                           nAGQ = 0, control = glmerControl(optimizer = "nloptwrap", calc.derivs = FALSE),family=binomial) )


summary(baseline2 <- glmer(concrete~complaint+
                             post_length_log+phone+topic + # Thread controls
                             
                             (1|cityid)+(0+complaint|cityid), # City cluster
                           d,
                           nAGQ = 0, control = glmerControl(optimizer = "nloptwrap", calc.derivs = FALSE),family=binomial))

summary(baseline3 <- glmer(concrete~complaint+
                             post_length_log+phone+topic+ # Thread controls
                             male+age+han_race+edu_shuangyiliu_bin+edu_phd+tenure_length_m+ # Pref leader controls
                             
                             (1|cityid)+(0+complaint|cityid), # City cluster
                           d,
                           nAGQ = 0, control = glmerControl(optimizer = "nloptwrap", calc.derivs = FALSE),family=binomial))


summary(baseline4 <- glmer(concrete~complaint+
                             post_length_log+phone+topic+ # Thread controls
                             male+age+han_race+edu_shuangyiliu_bin+edu_phd+tenure_length_m+ # Pref leader controls
                             gdp_pc_log+pop_log+revenue_log+  # City econ controls 
                             
                             (1|cityid)+(0+complaint|cityid), # City cluster
                           d,
                           nAGQ = 0, control = glmerControl(optimizer = "nloptwrap", calc.derivs = FALSE),family=binomial))

# Main model (Furhter add reply controls)
summary(baseline5 <- glmer(concrete~complaint+
                             post_length_log+phone+topic+ # Thread controls
                             male+age+han_race+edu_shuangyiliu_bin+edu_phd+tenure_length_m+ # Pref leader controls
                             gdp_pc_log+pop_log+revenue_log+  # City econ controls 
                             replied_days+reply_length_log+   
                             (1|cityid)+(0+complaint|cityid), # City cluster
                           d,
                           nAGQ = 0, control = glmerControl(optimizer = "nloptwrap", calc.derivs = FALSE),family=binomial))


# Broad definition of concrete reply as DV

summary(baseline6 <- glmer(concrete_broad~complaint+
                             post_length_log+phone+topic+ # Thread controls
                             male+age+han_race+edu_shuangyiliu_bin+edu_phd+tenure_length_m+ # Pref leader controls
                             gdp_pc_log+pop_log+revenue_log+  # City econ controls 
                             replied_days+reply_length_log+   
                             (1|cityid)+(0+complaint|cityid), # City cluster
                           d,
                           nAGQ = 0, control = glmerControl(optimizer = "nloptwrap", calc.derivs = FALSE),family=binomial))


# Government action as DV
summary(baseline7 <- glmer(action~complaint+
                             post_length_log+phone+topic+ # Thread controls
                             male+age+han_race+edu_shuangyiliu_bin+edu_phd+tenure_length_m+ # Pref leader controls
                             gdp_pc_log+pop_log+revenue_log+  # City econ controls 
                             replied_days+reply_length_log+   
                             (1|cityid)+(0+complaint|cityid), # City cluster
                           d,
                           nAGQ = 0, control = glmerControl(optimizer = "nloptwrap", calc.derivs = FALSE),family=binomial))
# Resolved as Dv
summary(baseline8 <- glmer(resolved~complaint+
                             post_length_log+phone+topic+ # Thread controls
                             male+age+han_race+edu_shuangyiliu_bin+edu_phd+tenure_length_m+ # Pref leader controls
                             gdp_pc_log+pop_log+revenue_log+  # City econ controls 
                             replied_days+reply_length_log+   
                             (1|cityid)+(0+complaint|cityid), # City cluster
                           d,
                           nAGQ = 0, control = glmerControl(optimizer = "nloptwrap", calc.derivs = FALSE),family=binomial))


# save results
sjPlot::tab_model(baseline1, baseline2, baseline3,baseline4,baseline5,baseline6,baseline7,baseline8,
                  transform=NULL, # not showing odds ratio
                  show.ci=F, show.se=T, collapse.se = T, p.style="stars", # one column (p: numeric, stars (asterisk), scientific, numeric_stars)
                  digits=2, digits.p=2,
                  #terms=c("complaint"), # Only present the interested predictors
                  show.r2= F, show.aic=T,show.loglik=F,show.dev=F,show.obs=T,
                  file= "output/main/Table 2 Baseline findings.html")  



## Figure 5 ####
theme_set(theme_sjplot()) # set the format of graph 
pdf("output/main/Figure 5 Visualization of Model 5 in Table 2.pdf", width = 8, height = 7)
plot_model(baseline5, transform = NULL,
           vline.color = "grey",
           sort.est = TRUE,
           show.values = TRUE, value.offset = .3, value.size = 3,
           # axis.labels = "", 
           dot.size = 0.6,line.size = 0.3,
           width = 0.4,
           title = "Visualization of Model 5 in Table 2",
           # terms = c(),
           wrap.title = 100)
dev.off()   





## Robustness checks ####
# See the code file: "2.Appendix_results.R"



#____________####
#--------------------------------------------------------------------------------#
# H2: Why prioritizing complaints ####
#--------------------------------------------------------------------------------#
# information collection 
# legitimacy
# risk aversion 


## Table 3 ####

## Negative information collection
table(d$normalized_negative_score)
summary(why1 <- glmer(concrete~complaint+
                        post_length_log+phone+topic+ # Thread controls
                        male+age+han_race+edu_shuangyiliu_bin+edu_phd+tenure_length_m+ # Pref leader controls
                        gdp_pc_log+pop_log+revenue_log+  # City econ controls
                        replied_days+reply_length_log+  # Reply controls
                        
                        normalized_negative_score+ # 
                        
                        (1|cityid)+(0+complaint|cityid), # City cluster
                      d,
                      nAGQ = 0, control = glmerControl(optimizer = "nloptwrap", calc.derivs = FALSE),family=binomial))


## Performing citizenship
summary(d$subject_citizenship) # 0.2231
summary(d$legal_citizenship) # 0.1872 # similar to their 0.14 (performing citizenship article)
summary(d$socialist_citizenship) # 0.3204



# add citizenship
summary(why2 <- glmer(concrete~complaint+
                        post_length_log+phone+topic+ # Thread controls
                        male+age+han_race+edu_shuangyiliu_bin+edu_phd+tenure_length_m+ # Pref leader controls
                        gdp_pc_log+pop_log+revenue_log+  # City econ controls 
                        replied_days+reply_length_log+  # Reply controls
                        
                        subject_citizenship+legal_citizenship+socialist_citizenship+
                        
                        (1|cityid)+(0+complaint|cityid), # City cluster
                      d,
                      nAGQ = 0, control = glmerControl(optimizer = "nloptwrap", calc.derivs = FALSE),family=binomial))



## Risk aversion

table(d$protest_risk) # 0: 136702  1: 96302 
table(d$from_hotline12345) # 0: 225851   1: 7153 

summary(why3 <- glmer(concrete~complaint+
                        post_length_log+phone+topic+ # Thread controls
                        male+age+han_race+edu_shuangyiliu_bin+edu_phd+tenure_length_m+ # Pref leader controls
                        gdp_pc_log+pop_log+revenue_log+  # City econ controls
                        replied_days+reply_length_log+ # Reply controls
                        
                        protest_risk+from_hotline12345+ # @
                        
                        (1|cityid)+(0+complaint|cityid), # City cluster
                      d,
                      nAGQ = 0, control = glmerControl(optimizer = "nloptwrap", calc.derivs = FALSE),family=binomial))


# All in one
summary(why4 <- glmer(concrete~complaint+
                        post_length_log+phone+topic+ # Thread controls
                        male+age+han_race+edu_shuangyiliu_bin+edu_phd+tenure_length_m+ # Pref leader controls
                        gdp_pc_log+pop_log+revenue_log+  # City econ controls 
                        replied_days+reply_length_log+  # Reply controls
                        
                        normalized_negative_score+ 
                        subject_citizenship+legal_citizenship+socialist_citizenship+
                        protest_risk+from_hotline12345+ 
                        
                        (1|cityid)+(0+complaint|cityid), # City cluster
                      d,
                      nAGQ = 0, control = glmerControl(optimizer = "nloptwrap", calc.derivs = FALSE),family=binomial))


# keeping particular covariates for presenting
keep1 = c(
  "complaint",
  "normalized_negative_score",
  "subject_citizenship","legal_citizenship","socialist_citizenship",
  "protest_risk","from_hotline12345"
)

sjPlot::tab_model(why1,why2,why3,why4,
                  transform=NULL, # not showing odds ratio
                  show.ci=F, show.se=T, collapse.se = T, p.style="stars", # one column (p: numeric, stars (asterisk), scientific, numeric_stars)
                  digits=2, digits.p=2,
                  terms= keep1, # Only present the interested predictors
                  show.r2= F, show.aic=T,show.loglik=F,show.dev=F,show.obs=T,
                  file= "output/main/Table 3 Reasons of prioritizing complaints.html") 



## Figure 6 ####
relevant_vars <- c( "cityid", "concrete", "complaint", 
                    "normalized_negative_score", 
                   "subject_citizenship",
                   "from_hotline12345", "protest_risk",
                   
                   "post_length_log", "phone", "topic",
                   "male", "age", "han_race", "edu_shuangyiliu_bin", "edu_phd", "tenure_length_m",
                   "gdp_pc_log", "pop_log", "revenue_log", "replied_days", "reply_length_log"
                  )
d_complete <- d |> 
  dplyr::select(all_of(relevant_vars)) |> 
  na.omit() |>
  as.data.frame()
# Verify completeness
print(nrow(d_complete))
print(sum(is.na(d_complete)))


### Negative sentiment ####
summary(d$normalized_negative_score)
med.fit1 <- lmer(normalized_negative_score ~ complaint 
                 + (1 | cityid), 
                 data = d_complete, control = lmerControl(optimizer = "nloptwrap", calc.derivs = FALSE)
)


out.fit1 <- glmer(concrete ~ complaint*normalized_negative_score
                  + (1 + complaint | cityid), 
                  data = d_complete, 
                  family = binomial, nAGQ = 0, control = glmerControl(optimizer = "nloptwrap", calc.derivs = FALSE)
)

# Perform mediation analysis with very simplified models
gc() # clear the memory used

# Set a seed for reproducibility
set.seed(1)
med.out1 <- mediate(med.fit1, out.fit1, 
                    treat = "complaint", mediator = "normalized_negative_score", 
                    sims = 300)
# Note: if set the sims = 1000, the error report will be "Error: vector memory limit of 18.0 Gb reached, see mem.maxVSize()"

# Summarize mediation analysis results
pdf("output/main/Figure 6a Mediation.pdf", width = 7.5, height = 6) 
plot(med.out1, main="H2a: Negative Information Mechanism")
(temp=summary(med.out1))
(mediator_percent = round(temp$n.avg * 100,2)) # 6.70% are through the mediator
text(x = 0.005, y = 3.2, 
     labels = paste0(mediator_percent, "% of the effect is through the mediator"),
     pos = 4, cex = 0.9, font = 2, col = "darkblue")
dev.off()


### Subjecthood citizenship #####
summary(d$subject_citizenship)

med.fit2 <- glmer(subject_citizenship ~ complaint 
                 + (1 | cityid), 
                 data = d_complete, 
                 family = binomial, nAGQ = 0, control = glmerControl(optimizer = "nloptwrap", calc.derivs = FALSE)
)

out.fit2 <- glmer(concrete ~ complaint*subject_citizenship
                  + (1 + complaint | cityid), 
                  data = d_complete, 
                  family = binomial, nAGQ = 0, control = glmerControl(optimizer = "nloptwrap", calc.derivs = FALSE)
)

# Perform mediation analysis with very simplified models
set.seed(1)
med.out2 <- mediate(med.fit2, out.fit2, 
                    treat = "complaint", mediator = "subject_citizenship", 
                    sims = 300)

# Summarize mediation analysis results
summary(med.out2)
pdf("output/main/Figure 6b Mediation.pdf", width = 7.5, height = 6) 
plot(med.out2, main="H2b: Subjecthood Citizenship Mechanism")
(temp=summary(med.out2))
(mediator_percent = round(temp$n.avg * 100,2)) 
text(x = 0.005, y = 3.2, 
     labels = paste0(mediator_percent, "% of the effect is through the mediator"),
     pos = 4, cex = 0.9, font = 2, col = "darkblue")
dev.off()


### 12345 hotline #####
summary(d$from_hotline12345)

med.fit3 <- glmer(from_hotline12345 ~ complaint 
                  + (1 | cityid), 
                  data = d_complete, 
                  family = binomial, nAGQ = 0, control = glmerControl(optimizer = "nloptwrap", calc.derivs = FALSE)
)

out.fit3 <- glmer(concrete ~ complaint*from_hotline12345
                  + (1 + complaint | cityid), 
                  data = d_complete, 
                  family = binomial, nAGQ = 0, control = glmerControl(optimizer = "nloptwrap", calc.derivs = FALSE)
)

# Perform mediation analysis with very simplified models
set.seed(1)
med.out3 <- mediate(med.fit3, out.fit3, 
                    treat = "complaint", mediator = "from_hotline12345", 
                    sims = 300)

# Summarize mediation analysis results
summary(med.out3)
pdf("output/main/Figure 6c Mediation.pdf", width = 7.5, height = 6) 
plot(med.out3, main="H2c: Risk Aversion Mechanism\n (From 12345 hotlines)")
(temp=summary(med.out3))
(mediator_percent = round(temp$n.avg * 100,2)) 
text(x = 0.005, y = 3.2, 
     labels = paste0(mediator_percent, "% of the effect is through the mediator"),
     pos = 4, cex = 0.9, font = 2, col = "darkblue")
dev.off()



### Protest terms #####
summary(d$protest_risk)

med.fit4 <- glmer(protest_risk ~ complaint 
                  + (1 | cityid), 
                  data = d_complete, 
                  family = binomial, nAGQ = 0, control = glmerControl(optimizer = "nloptwrap", calc.derivs = FALSE)
)

out.fit4 <- glmer(concrete ~ complaint*protest_risk
                  + (1 + complaint | cityid), 
                  data = d_complete, 
                  family = binomial, nAGQ = 0, control = glmerControl(optimizer = "nloptwrap", calc.derivs = FALSE)
)

# Perform mediation analysis with very simplified models
set.seed(1)
med.out4 <- mediate(med.fit4, out.fit4, 
                    treat = "complaint", mediator = "protest_risk", 
                    sims = 300)

# Summarize mediation analysis results
summary(med.out4)
pdf("output/main/Figure 6d Mediation.pdf", width = 7.5, height = 6) 
plot(med.out4, main="H2c: Risk Aversion Mechanism\n (Containing protest terms)")
(temp=summary(med.out4))
(mediator_percent = round(temp$n.avg * 100,2)) 
text(x = 0.005, y = 3.2, 
     labels = paste0(mediator_percent, "% of the effect is through the mediator"),
     pos = 4, cex = 0.9, font = 2, col = "darkblue")
dev.off()










#____________####
#--------------------------------------------------------------------------------#
# Extension ####
#--------------------------------------------------------------------------------#

## Wuhan Evidence ###### 
# See "3.Extension_Wuhan.R".










